-
Notifications
You must be signed in to change notification settings - Fork 69
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
New extents implementation #212
Conversation
f206c73
to
3fa2214
Compare
You may just want to look at the new extents file not in the diff. I did just replace the file after all. |
Some perf tests: template <class Extents, class ... Args>
void extents_construction(benchmark::State& state, Extents, int R, Args ... args) {
for (auto _ : state) {
double val =0;
for(int r=0; r<R; r++) {
Extents ext(args...);
val += ext.extent(0);
}
if(val<1.0*R*10) std::terminate();
}
}
BENCHMARK_CAPTURE(
extents_construction, int_int_int, stdex::dextents<int,3>(), 100000000, 100, 100, 100
);
BENCHMARK_CAPTURE(
extents_construction, array_int_3, stdex::dextents<int,3>(), 100000000, std::array<int,3>{100, 100, 100}
); In -O0 the new extents are faster. In O1 and above they are the same. O0:
O3:
|
Compile time test construction: #include<experimental/mdspan>
constexpr size_t dyn = std::experimental::dynamic_extent;
template<class IndexT, size_t R, size_t ... Args>
struct ExtentsConstructor {
using extents_t = std::experimental::extents<IndexT, R, Args...>;
template<class ... DynArgs>
static constexpr size_t value (DynArgs ... args) {
extents_t ext(R,args...);
return ext.extent(0) + ExtentsConstructor<IndexT, R-1, Args...>::value(args...);
}
};
template<class IndexT, size_t ... Args>
struct ExtentsConstructor<IndexT, 0, Args...> {
template<class ... DynArgs>
static constexpr size_t value (DynArgs ... args) { return 0; }
};
size_t foo(int val1) {
size_t value = 0;
value += ExtentsConstructor<int32_t, 120, 3, 4, 5>::value(val1,4,5);
value += ExtentsConstructor<uint32_t, 120, 3, 4, 5>::value(val1,4,5);
value += ExtentsConstructor<int64_t, 120, 3, 4, 5>::value(val1,4,5);
value += ExtentsConstructor<uint64_t, 120, 3, 4, 5>::value(val1,4,5);
value += ExtentsConstructor<int32_t, 120, dyn,dyn,dyn>::value(val1,val1,val1);
value += ExtentsConstructor<uint32_t, 120, dyn,dyn,dyn>::value(val1,val1,val1);
value += ExtentsConstructor<int64_t, 120, dyn,dyn,dyn>::value(val1,val1,val1);
value += ExtentsConstructor<uint64_t, 120, dyn,dyn,dyn>::value(val1,val1,val1);
return value;
} Simply compile as object file: GCC 11.1 Old: 5.4s New 4.8s So looks like compile time for construction went down. |
Compile time access benchmark: #include<experimental/mdspan>
constexpr size_t dyn = std::experimental::dynamic_extent;
template<class Extents, size_t R>
struct ExtentsConstructor {
static constexpr size_t value (const Extents& ext) {
return ext.extent(static_cast<int32_t>(R%Extents::rank())) +
ext.extent(static_cast<int64_t>(R%Extents::rank())) +
ext.extent(static_cast<uint32_t>(R%Extents::rank())) +
ext.extent(static_cast<uint64_t>(R%Extents::rank())) +
+ ExtentsConstructor<Extents, R-1>::value(ext);
}
};
template<class Extents>
struct ExtentsConstructor<Extents, 0> {
static constexpr size_t value (const Extents&) { return 0; }
};
size_t foo(int val1) {
size_t value = 0;
{
using ext_t = std::experimental::extents<int32_t, 3, 4, 5>;
value += ExtentsConstructor<ext_t, 120>::value(ext_t(val1,4,5));
}
{
using ext_t = std::experimental::extents<int64_t, 3, 4, 5>;
value += ExtentsConstructor<ext_t, 120>::value(ext_t(val1,4,5));
}
{
using ext_t = std::experimental::extents<uint32_t, 3, 4, 5>;
value += ExtentsConstructor<ext_t, 120>::value(ext_t(val1,4,5));
}
{
using ext_t = std::experimental::extents<uint64_t, 3, 4, 5>;
value += ExtentsConstructor<ext_t, 120>::value(ext_t(val1,4,5));
}
{
using ext_t = std::experimental::extents<int32_t, dyn, dyn, dyn>;
value += ExtentsConstructor<ext_t, 120>::value(ext_t(val1,4,5));
}
{
using ext_t = std::experimental::extents<int64_t, dyn, dyn, dyn>;
value += ExtentsConstructor<ext_t, 120>::value(ext_t(val1,4,5));
}
{
using ext_t = std::experimental::extents<uint32_t, dyn, dyn, dyn>;
value += ExtentsConstructor<ext_t, 120>::value(ext_t(val1,4,5));
}
{
using ext_t = std::experimental::extents<uint64_t, dyn, dyn, dyn>;
value += ExtentsConstructor<ext_t, 120>::value(ext_t(val1,4,5));
}
return value;
} GCC 11.1 Old: 0.95 New 0.89 |
I also ran the existing benchmarks and didn't see anything bad. So in general I think this is good to go. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a lovely improvement -- it's much easier to read! Thank you for doing this!
My only concern is minor, that functions like get
sometimes take int
and sometimes take size_t
. This could lead to conversion warnings.
Co-Authored-By: Wesley Maxey <71408887+wmaxey@users.noreply.github.com>
f3ad21e
to
cd39b4c
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't love it but it is mostly fine
@@ -272,7 +239,11 @@ struct layout_stride { | |||
#else | |||
: __base_t(__base_t{__member_pair_t( | |||
#endif | |||
#ifndef _MDSPAN_NEW_EXTENT2S |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is this macro?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
leftover from when you could switch in the PR between the old and new implementation, gonna remove.
else | ||
return 0; | ||
} | ||
template <size_t r> MDSPAN_INLINE_FUNCTION constexpr static size_t get() { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This overload is never used.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I still kinda would go for consistency here across the different things which looks similar?
constexpr static T get(size_t) { return T(); } | ||
template <size_t> MDSPAN_INLINE_FUNCTION constexpr static T get() { | ||
return T(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Calling get
on an empty array is definitely a precondition violation. Thus, would you consider using __builtin_unreachable()
with GCC, or std::unreachable
with C++23, with functions like this? This works fine in practice with constexpr
functions.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I really don't want to add ifdefs and builtins inside this implementation only code.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we should at least assert since it will be called on out of bounds access and static_assert
where possible
: __storage_{ | ||
#else | ||
: __base_t(__base_t{typename __base_t::__stored_type{ | ||
constexpr maybe_static_array(const std::span<T, N> &vals) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we consider passing span
by value instead of by const reference?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
not sure.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think my biggest issue is returning values if the size of maybe_static_array
is 0. I would rather specialize and maybe even assert if the get functions are accessed
constexpr static size_t m_size = sizeof...(Values); | ||
constexpr static size_t m_size_dynamic = | ||
_MDSPAN_FOLD_PLUS_RIGHT((Values == dyn_tag), 0); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These can be moved into the respective functions
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
actually m_size_dynamic is used right below here, I guess if I moved that member to the end so the constexpr accessor functions are already defined? But I don't see a big benefit since they are anyway private.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's just less code. I'm not dead set on this
MDSPAN_TEMPLATE_REQUIRES(class T, size_t N, | ||
/* requires */ (N == m_size_dynamic && N == 0)) | ||
MDSPAN_INLINE_FUNCTION | ||
explicit constexpr extents(Integral... exts) noexcept | ||
#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) | ||
: __storage_{ | ||
#else | ||
: __base_t(__base_t{typename __base_t::__stored_type{ | ||
constexpr maybe_static_array(const std::array<T, N> &) : m_dyn_vals{} {} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
MDSPAN_TEMPLATE_REQUIRES(class T, size_t N, | |
/* requires */ (N == m_size_dynamic && N == 0)) | |
MDSPAN_INLINE_FUNCTION | |
explicit constexpr extents(Integral... exts) noexcept | |
#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) | |
: __storage_{ | |
#else | |
: __base_t(__base_t{typename __base_t::__stored_type{ | |
constexpr maybe_static_array(const std::array<T, N> &) : m_dyn_vals{} {} | |
MDSPAN_TEMPLATE_REQUIRES(class T, | |
/* requires */ (m_size_dynamic == 0)) | |
MDSPAN_INLINE_FUNCTION | |
constexpr maybe_static_array(const std::array<T, 0> &) : m_dyn_vals{} {} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Doesn't work because of std::array not working on device.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh weird, my diff got messed up idk why it grabbed some of the old code. The only change here was to simplify
MDSPAN_TEMPLATE_REQUIRES(class T, size_t N,
/* requires */ (N == m_size_dynamic && N == 0))
MDSPAN_INLINE_FUNCTION
constexpr maybe_static_array(const std::array<T, N> &) : m_dyn_vals{} {}
to
MDSPAN_TEMPLATE_REQUIRES(class T,
/* requires */ (m_size_dynamic == 0))
MDSPAN_INLINE_FUNCTION
constexpr maybe_static_array(const std::array<T, 0> &) : m_dyn_vals{} {}
-- you are already using array here so this isn't introducing it in a new place
template<class OtherIndexType, size_t... RHS> | ||
template <class IndexType, size_t... Extents> class extents { | ||
public: | ||
// typedefs for integral types used |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we consider checking the Mandate that IndexType
is a signed or unsigned integral type? That would effectively make the code self-documenting, so that the comment would become superfluous.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah I can do that.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi Christian! This is much more concise; I like it! I just made a few optional suggestions.
Co-authored-by: Mark Hoemmen <mhoemmen@users.noreply.github.com> Co-authored-by: Nicolas Morales <nmmoral@sandia.gov>
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think overall I would want the out of bounds checks to be asserted/static_asserted when able but otherwise looks good!
constexpr static T get(size_t) { return T(); } | ||
template <size_t> MDSPAN_INLINE_FUNCTION constexpr static T get() { | ||
return T(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we should at least assert since it will be called on out of bounds access and static_assert
where possible
constexpr static size_t m_size = sizeof...(Values); | ||
constexpr static size_t m_size_dynamic = | ||
_MDSPAN_FOLD_PLUS_RIGHT((Values == dyn_tag), 0); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's just less code. I'm not dead set on this
MDSPAN_TEMPLATE_REQUIRES(class T, size_t N, | ||
/* requires */ (N == m_size_dynamic && N == 0)) | ||
MDSPAN_INLINE_FUNCTION | ||
explicit constexpr extents(Integral... exts) noexcept | ||
#if defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) | ||
: __storage_{ | ||
#else | ||
: __base_t(__base_t{typename __base_t::__stored_type{ | ||
constexpr maybe_static_array(const std::array<T, N> &) : m_dyn_vals{} {} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh weird, my diff got messed up idk why it grabbed some of the old code. The only change here was to simplify
MDSPAN_TEMPLATE_REQUIRES(class T, size_t N,
/* requires */ (N == m_size_dynamic && N == 0))
MDSPAN_INLINE_FUNCTION
constexpr maybe_static_array(const std::array<T, N> &) : m_dyn_vals{} {}
to
MDSPAN_TEMPLATE_REQUIRES(class T,
/* requires */ (m_size_dynamic == 0))
MDSPAN_INLINE_FUNCTION
constexpr maybe_static_array(const std::array<T, 0> &) : m_dyn_vals{} {}
-- you are already using array here so this isn't introducing it in a new place
public static_array_impl<0, T, Values...> { | ||
|
||
template <class ThisIndexType, size_t... Extents> | ||
class extents | ||
#if !defined(_MDSPAN_USE_ATTRIBUTE_NO_UNIQUE_ADDRESS) | ||
: private detail::__no_unique_address_emulation< | ||
detail::__partially_static_sizes_tagged<detail::__extents_tag, ThisIndexType , size_t, Extents...>> | ||
#endif | ||
{ | ||
public: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Very minor point, but both public
s here are redundant
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The above thing is not the same (where you say m_size_dynamic==0). Think N =1 and m_size_dynamic = 0, your case would take it, whats there right now doesn.t
9396793
to
c93cd34
Compare
@nmm0 wrote:
regarding template <size_t> MDSPAN_INLINE_FUNCTION constexpr static T get() {
return T();
} another approach would be to use the C++23 feature |
Co-authored-by: Mark Hoemmen <mhoemmen@users.noreply.github.com>
…product_kokkos_impl [ready] kokkos impl `triangular_matrix_{left,right}_product`
This replaces the extents implementation with something significanlty simpler. Need to do some more performance testing, but this is ready for review.